library(vegan) library(MASS) envdata<-read.csv('env.csv',header=T,row.names=1) sitegroup<-read.csv('Site_Groups.csv',header=T,row.names=1) #log 10 transform envdata envdata.tran<-data.trans(envdata,method='log',plot=F) #LDA env.lda<-lda(envdata.tran,sitegroup[,-2]) #look at it env.lda #compute the canonical scores (the scores for each obs on each canonical axis) env.lda.pred<-predict(env.lda) #look at it env.lda.pred #save the scores as a different object scores<-env.lda.pred$x scores #graphically examine separation to get a sense of how well LDA worked #bind the grouping variable to the data frame containing scores result<-as.data.frame(cbind(sitegroup[,-2],scores)) #check it result #note: here it gives site group as numbers 1-4 not MidAt, Ohio etc. #plot it box.plots(result,var='LD1',by='V1',notch=TRUE,varwidth=TRUE) hist.plots(result,var='LD1',by='V1') #classification success? #'confusion matrix' see how many were classified correctly env.table<-table(sitegroup[,-2],env.lda.pred$class) #look at it env.table #compute the overall correct classification rate (how many correct/correct + incorrect) sum(diag(env.table))/sum(env.table) #view the raw canonical coefficients env.lda$scaling #structure.. 'easier' to interpret than raw canonical coefficients lda.structure(scores,envdata.tran) #interpret: the most important variables that discriminate the groups along LD1 are elevation and road density #LDA: biplot plot(scores,type='n') text(scores,labels=sitegroup[,-2])